From b9a3bad6d8dfffb8f0aea9c42ba5977e51f65dc0 Mon Sep 17 00:00:00 2001 From: robertl Date: Sun, 1 Feb 2004 05:04:46 +0000 Subject: [PATCH] strip_html: new fn. --- gpsbabel/util.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/gpsbabel/util.c b/gpsbabel/util.c index 45612bd3e..f30a05d81 100644 --- a/gpsbabel/util.c +++ b/gpsbabel/util.c @@ -745,6 +745,52 @@ char * str_utf8_to_ascii( const char * str ) return result; } +/* + * Without getting into all the complexity of technically legal HTML, + * this function tries to strip "ugly" parts of it to make it more + * pleasant for a human reader. Yes, this falls down in all kinds of + * ways such as spaces within the tags, etc. + */ +char * +strip_html(utf_string *in) +{ + char *outstring, *out; + int ctr; + char *instr = in->utfstring; + + if (!in->is_html) + return in->utfstring; + /* + * We only shorten, so just dupe the input buf for space. + */ + out = outstring = xstrdup(in->utfstring); + outstring[0] = 0; + + for(ctr=0; ; instr++) { + switch(*instr) { + case 0: + fprintf(stderr, "%s\n", out); + return (out); + + case '<': + fprintf(stderr, "\n+"); + if (instr[1] == 'p') + *outstring++ = '\n'; + ctr++; + break; + case '>': + ctr--; + break; + case '\n': + continue; + default: + if (ctr == 0) { + *outstring++ = *instr; + } + } + } +} + char * xml_entitize(const char * str) { int elen, ecount, nsecount; -- 2.30.2